# -*- coding: utf-8 -*-
# Copyright Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import mindspore.nn as nn
from mindspore.ops import operations as P
from mindformers.modules.layers import Linear
from mindformers.models.modeling_utils import PreTrainedModel
from mindformers.models.configuration_utils import PretrainedConfig


def conv_bn_relu(in_channel, out_channel, kernel_size, stride, depth_wise, activation='relu6'):
    output = [nn.Conv2d(in_channel, out_channel, kernel_size, stride, pad_mode="same",
                        group=1 if not depth_wise else in_channel), nn.BatchNorm2d(out_channel)]
    if activation:
        output.append(nn.get_activation(activation))
    return nn.SequentialCell(output)


class TestNetMindSpore(nn.Cell):
    """
    MobileNet V1 backbone
    """

    def __init__(self, class_num=10, features_only=False):
        super(TestNetMindSpore, self).__init__()
        self.features_only = features_only
        cnn1 = [
            conv_bn_relu(3, 32, 3, 2, False),
        ]
        cnn2 = [
            conv_bn_relu(32, 32, 3, 2, False),
        ]
        self.network1 = nn.SequentialCell(cnn1)
        self.network2 = nn.SequentialCell(cnn2)
        self.fc = nn.Dense(32, class_num)

    def construct(self, x):
        output = x
        output = self.network1(output)
        output = self.network2(output)
        output = P.ReduceMean()(output, (2, 3))
        output = self.fc(output)
        return output


class TestNetMindSpore2(nn.Cell):
    """
    MobileNet V1 backbone
    """

    def __init__(self, class_num=10, features_only=False):
        super(TestNetMindSpore2, self).__init__()
        self.features_only = features_only
        cnn = [
            conv_bn_relu(3, 32, 3, 2, False),
        ]
        self.backbone = nn.SequentialCell(cnn)
        self.fc = nn.Dense(32, class_num)

    def construct(self, x):
        output = x
        output = self.backbone(output)
        output = P.ReduceMean()(output, (2, 3))
        output = self.fc(output)
        return output


class TestNetMindSpore3(nn.Cell):
    """
    MobileNet V1 backbone
    """

    def __init__(self, class_num=10, features_only=False):
        super().__init__()
        self.features_only = features_only
        cnn = [
            conv_bn_relu(3, 32, 3, 2, False),
        ]
        self.backbone = nn.SequentialCell(cnn)
        self.fc = nn.Dense(32, class_num)

    def construct(self, x, y):
        output = x
        output = self.backbone(output)
        output = P.ReduceMean()(output, (2, 3))
        output = self.fc(output)
        return output


class TestNetMindSpore4(nn.Cell):
    """
    TestNet
    """

    def __init__(self, class_num=10):
        super().__init__()
        self.backbone = nn.SequentialCell([conv_bn_relu(3, 64, 3, 2, False),
                                           conv_bn_relu(64, 32, 3, 1, False)])
        self.fc = nn.Dense(32, class_num)

    def forward(self, x):
        x = self.backbone(x)
        x = P.ReduceMean(keep_dims=True)(x)
        x = x.Flatten()
        x = self.fc(x)
        return x


def get_model():
    return TestNetMindSpore(class_num=10)


class SampleModel(nn.Cell):

    def __init__(self, num_class=10, num_channel=3):
        super(SampleModel, self).__init__()
        # 定义所需要的运算
        self.conv1 = nn.Conv2d(num_channel, 16, 3, pad_mode='valid')
        self.conv2 = nn.Conv2d(16, 16, 3, pad_mode='valid')
        self.fc1 = nn.Dense(16 * 3 * 3, 10)
        self.fc2 = nn.Dense(10, num_class)
        self.relu = nn.ReLU()

    def construct(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        y = self.relu(x)
        return y


class LrdSampleNetwork(nn.Cell):
    def __init__(self):
        super().__init__()
        self.embedding = nn.SequentialCell([
            nn.Embedding(16, 32),
            nn.Dense(32, 64),
            nn.ReLU(),
        ])

        self.feature = nn.SequentialCell([
            nn.Conv2d(64, 128, 3, pad_mode="pad", padding=1, has_bias=True),
            nn.ReLU(),
            nn.Conv2d(128, 64, 1, has_bias=True),
            nn.ReLU(),
        ])
        self.pool = nn.AvgPool2d((7, 7), stride=2)
        self.flatten = nn.Flatten()
        self.inner = nn.Dense(64 * 5 * 5, 512)
        self.classifier = nn.SequentialCell([
            nn.Dense(512, 256),
            nn.Dense(256, 10),
        ])

    def construct(self, inputs):
        shortcut = self.embedding(inputs)
        shortcut = shortcut.transpose([0, 3, 1, 2])
        next_node = self.feature(shortcut)
        next_node = next_node + shortcut
        next_node = self.pool(next_node)
        next_node = self.flatten(next_node)
        next_node = self.inner(next_node)
        next_node = self.classifier(next_node)
        return next_node


class MsTeacherModel(nn.Cell):
    def __init__(self):
        super().__init__()
        self.teacher_fc = nn.Dense(1, 1)

    def construct(self, inputs):
        logits = self.teacher_fc(inputs)
        loss = logits / 2
        return loss


class MsStudentModel(nn.Cell):
    def __init__(self):
        super().__init__()
        self.student_fc = nn.Dense(1, 1)

    def construct(self, inputs):
        logits = self.student_fc(inputs)
        loss = logits / 2
        return loss


class MsSparseModel(PreTrainedModel):
    def __init__(self, config: PretrainedConfig):
        super().__init__(config)
        self.q = Linear(256, 256)
        self.k = Linear(256, 256)
        self.v = Linear(256, 256)
    
    def construct(self, x):
        x1 = self.q(x)
        x2 = self.k(x)
        x3 = self.v(x)
        return x1 + x2 + x3
    